#!/usr/bin/env ruby

# Copyright (C) 2008 Rapid7, Inc.

#
# This script extracts the forms from the main page of each
# web site in a list. The output of this can be used with
# Metasploit (and other tools) to obtain the saved form data
# of these domains.
#

require 'rubygems'  # install rubygems
require 'hpricot'   # gem install hpricot
require 'uri'
require 'timeout'

def usage
  $stderr.puts "#{$0} [site list] [output-dir]"
  exit(0)
end

sitelist = ARGV.shift() || usage()
output   = ARGV.shift() || usage()

File.readlines(sitelist).each do |site|
  site.strip!
  next if site.length == 0
  next if site =~ /^#/

  out = File.join(output, site + ".txt")
  File.unlink(out) if File.exist?(out)

  fd  = File.open(out, "a")


  ["", "www."].each do |prefix|
    begin
      Timeout.timeout(10) do
        doc = Hpricot(URI.parse("http://#{prefix}#{site}/").open)
        doc.search("//form").each do |form|

          # Extract the form
          res = "<form"
          form.attributes.each do |attr|
            res << " #{attr[0]}='#{attr[1].gsub("'", "")}'"
          end
          res << "> "

          # Strip out the value
          form.search("//input") do |inp|

            inp.attributes.keys.each do |ikey|
              if (ikey.downcase == "value")
                inp[ikey] = ""
                next
              end

              if(inp.attributes[ikey] =~ /^http/i)
                inp[ikey] = ""
                next
              end

            end

            res << inp.to_html
          end
          res << "</form>"

          fd.write(res)
        end
      end
      break
    rescue ::Timeout::Error
      $stderr.puts "#{prefix}#{site} timed out"
    rescue ::Interrupt
      raise $!
    rescue ::Exception => e
      $stderr.puts "#{prefix}#{site} #{e.class} #{e}"
    end
  end

  fd.close

  File.unlink(out) if (File.size(out) == 0)

end
